library(Biobase)
library(survminer)
library(survival)
library(tidyverse)
library(DT)

PATH <- file.path(Sys.getenv("MLAB"), "projects/brcameta/exosome/4t1_brca")
TCGAPATH <- file.path(Sys.getenv("CBM"), "TCGA-GDC/")
DPATH <- file.path(Sys.getenv("CBM"), "otherStudies/RNAseq/2023-03-22-YuhanExosomeBrCa")

do_save <- FALSE

Loading Data

tcga_gsva <- readRDS(file.path(PATH, "data/tcga_gsva_res.rds"))
metabric_gsva <- readRDS(file.path(PATH, "data/metabric_gsva_res.rds"))

# Signatures

all_sigs <- readRDS(file.path(PATH, "data/signatures_symbol.rds"))
all_sigs <- all_sigs[lapply(all_sigs, length) > 1]

all_sigs_human <- lapply(all_sigs, babelgene::orthologs, species = "mouse", human = FALSE)
all_sigs_human <- lapply(all_sigs_human, function(x) x %>% dplyr::pull(human_symbol))

Data Filtering

na_filter <- !is.na(tcga_gsva$vital_status)
missing_death_day_filter <- !((tcga_gsva$vital_status == "Dead") & is.na(tcga_gsva$days_to_death))
subtype_filter <- !is.na(tcga_gsva$subtype_selected)
data_filter <- na_filter & missing_death_day_filter & subtype_filter

tcga_gsva <- tcga_gsva[, data_filter]

Censoring: If samples are alive, then they lived at least as long as the day to their last followup. For 5 year threshold, those who died after 5 years, should be right-censored, and labeled to have lived at least as long as ‘days to death’ + 1.

pData(tcga_gsva) <- pData(tcga_gsva) %>%
  rownames_to_column("ID") %>%
  mutate(time = if_else(!is.na(days_to_death), days_to_death, days_to_last_follow_up)) %>%
  mutate(time_5 = if_else(as.numeric(time) < 1825.0, as.numeric(time), 1826.0)) %>%
  mutate(vital_status_1 = if_else(vital_status == "Alive", 1, 2)) %>%
  mutate(vital_status_5 = if_else(vital_status == "Dead" & (time_5 > 1825.0), 1, vital_status_1)) %>%
  column_to_rownames("ID")

pData(metabric_gsva) <- pData(metabric_gsva) %>%
  mutate(time = OS_MONTHS * 30.437) %>%
  mutate(time_5 = if_else(as.numeric(time) < 1825.0, as.numeric(time), 1826.0)) %>%
  mutate(vital_status_1 = if_else(OS_STATUS == "LIVING", 1, 2)) %>%
  mutate(vital_status_5 = if_else(OS_STATUS == "DECEASED" & (time_5 > 1825.0), 1, vital_status_1))

Survfit All subtypes

# Finding Median Thresholds

tcga_threshold_ir_c <- median(exprs(tcga_gsva["ir_c_up", ]) - exprs(tcga_gsva["ir_c_down"]))
tcga_threshold_is_c <- median(exprs(tcga_gsva["is_c_up", ]) - exprs(tcga_gsva["is_c_down", ]))
tcga_threshold_ir_is <- median(exprs(tcga_gsva["ir_is_up", ]) - exprs(tcga_gsva["ir_is_down", ]))
tcga_threshold_tgfb_c <- median(exprs(tcga_gsva["tgfb_c_up", ]) - exprs(tcga_gsva["tgfb_c_down", ]))

metabric_threshold_ir_c <- median(exprs(metabric_gsva["ir_c_up", ]) - exprs(metabric_gsva["ir_c_down"]))
metabric_threshold_is_c <- median(exprs(metabric_gsva["is_c_up", ]) - exprs(metabric_gsva["is_c_down", ]))
metabric_threshold_ir_is <- median(exprs(metabric_gsva["ir_is_up", ]) - exprs(metabric_gsva["ir_is_down", ]))
metabric_threshold_tgfb_c <- median(exprs(metabric_gsva["tgfb_c_up", ]) - exprs(metabric_gsva["tgfb_c_down", ]))

tcga_threshold_ir_c_up <- median(exprs(tcga_gsva["ir_c_up", ]))
tcga_threshold_is_c_up <- median(exprs(tcga_gsva["is_c_up", ]))
tcga_threshold_ir_is_up <- median(exprs(tcga_gsva["ir_is_up", ]))
tcga_threshold_tgfb_c_up <- median(exprs(tcga_gsva["tgfb_c_up", ]))

metabric_threshold_ir_c_up <- median(exprs(metabric_gsva["ir_c_up", ]))
metabric_threshold_is_c_up <- median(exprs(metabric_gsva["is_c_up", ]))
metabric_threshold_ir_is_up <- median(exprs(metabric_gsva["ir_is_up", ]))
metabric_threshold_tgfb_c_up <- median(exprs(metabric_gsva["tgfb_c_up", ]))

Adding GSVA data

tcga_gsva$ir_c <- t(exprs(tcga_gsva["ir_c_up", ]) - exprs(tcga_gsva["ir_c_down"]))
tcga_gsva$is_c <- t(exprs(tcga_gsva["is_c_up", ]) - exprs(tcga_gsva["is_c_down", ]))
tcga_gsva$ir_is <- t(exprs(tcga_gsva["ir_is_up", ]) - exprs(tcga_gsva["ir_is_down", ]))
tcga_gsva$tgfb_c <- t(exprs(tcga_gsva["tgfb_c_up", ]) - exprs(tcga_gsva["tgfb_c_down", ]))

tcga_gsva$ir_c_stat <- with(tcga_gsva, ifelse(tcga_gsva$ir_c <= tcga_threshold_ir_c, "low", "high"))
tcga_gsva$is_c_stat <- with(tcga_gsva, ifelse(tcga_gsva$is_c <= tcga_threshold_is_c, "low", "high"))
tcga_gsva$ir_is_stat <- with(tcga_gsva, ifelse(tcga_gsva$ir_is <= tcga_threshold_ir_is, "low", "high"))
tcga_gsva$tgfb_c_stat <- with(tcga_gsva, ifelse(tcga_gsva$tgfb_c <= tcga_threshold_tgfb_c, "low", "high"))

tcga_gsva$ir_c_up <- t(exprs(tcga_gsva["ir_c_up", ]))
tcga_gsva$is_c_up <- t(exprs(tcga_gsva["is_c_up", ]))
tcga_gsva$ir_is_up <- t(exprs(tcga_gsva["ir_is_up", ]))
tcga_gsva$tgfb_c_up <- t(exprs(tcga_gsva["tgfb_c_up", ]))
tcga_gsva$ir_c_up_stat <- with(tcga_gsva, ifelse(tcga_gsva$ir_c_up <= tcga_threshold_ir_c_up, "low", "high"))
tcga_gsva$is_c_up_stat <- with(tcga_gsva, ifelse(tcga_gsva$is_c_up <= tcga_threshold_is_c_up, "low", "high"))
tcga_gsva$ir_is_up_stat <- with(tcga_gsva, ifelse(tcga_gsva$ir_is_up <= tcga_threshold_ir_is_up, "low", "high"))
tcga_gsva$tgfb_c_up_stat <- with(tcga_gsva, ifelse(tcga_gsva$tgfb_c_up <= tcga_threshold_tgfb_c_up, "low", "high"))

metabric_gsva$ir_c <- t(exprs(metabric_gsva["ir_c_up", ]) - exprs(metabric_gsva["ir_c_down"]))
metabric_gsva$is_c <- t(exprs(metabric_gsva["is_c_up", ]) - exprs(metabric_gsva["is_c_down", ]))
metabric_gsva$ir_is <- t(exprs(metabric_gsva["ir_is_up", ]) - exprs(metabric_gsva["ir_is_down", ]))
metabric_gsva$tgfb_c <- t(exprs(metabric_gsva["tgfb_c_up", ]) - exprs(metabric_gsva["tgfb_c_down", ]))
metabric_gsva$ir_c_stat <- with(metabric_gsva, ifelse(metabric_gsva$ir_c <= metabric_threshold_ir_c, "low", "high"))
metabric_gsva$is_c_stat <- with(metabric_gsva, ifelse(metabric_gsva$is_c <= metabric_threshold_is_c, "low", "high"))
metabric_gsva$ir_is_stat <- with(metabric_gsva, ifelse(metabric_gsva$ir_is <= metabric_threshold_ir_is, "low", "high"))
metabric_gsva$tgfb_c_stat <- with(tcga_gsva, ifelse(metabric_gsva$tgfb_c <= metabric_threshold_tgfb_c, "low", "high"))

metabric_gsva$ir_c_up <- t(exprs(metabric_gsva["ir_c_up", ]))
metabric_gsva$is_c_up <- t(exprs(metabric_gsva["is_c_up", ]))
metabric_gsva$ir_is_up <- t(exprs(metabric_gsva["ir_is_up", ]))
metabric_gsva$tgfb_c_up <- t(exprs(metabric_gsva["tgfb_c_up", ]))
metabric_gsva$ir_c_up_stat <- with(metabric_gsva, ifelse(metabric_gsva$ir_c_up <= metabric_threshold_ir_c_up, "low", "high"))
metabric_gsva$is_c_up_stat <- with(metabric_gsva, ifelse(metabric_gsva$is_c_up <= metabric_threshold_is_c_up, "low", "high"))
metabric_gsva$ir_is_up_stat <- with(metabric_gsva, ifelse(metabric_gsva$ir_is_up <= metabric_threshold_ir_is_up, "low", "high"))
metabric_gsva$tgfb_c_up_stat <- with(metabric_gsva, ifelse(metabric_gsva$tgfb_c_up <= metabric_threshold_tgfb_c_up, "low", "high"))

IR_C_UP

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$ir_c_up_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$ir_c_up_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$ir_c_up_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$ir_c_up_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC w/ 5-year threshold"
)

IR_C

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$ir_c_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$ir_c_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$ir_c_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$ir_c_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC w/ 5-year threshold"
)

IS_C_UP

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$is_c_up_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$is_c_up_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$is_c_up_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$is_c_up_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC w/ 5-year threshold"
)

IS_C

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$is_c_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$is_c_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$is_c_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$is_c_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC w/ 5-year threshold"
)

IR_IS_UP

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$ir_is_up_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$ir_is_up_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$ir_is_up_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$ir_is_up_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC w/ 5-year threshold"
)

IR_IS

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$ir_is_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$ir_is_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$ir_is_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$ir_is_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC w/ 5-year threshold"
)

TGFB_C_UP

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$tgfb_c_up_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$tgfb_c_up_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$tgfb_c_up_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$tgfb_c_up_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC w/ 5-year threshold"
)

TGFB_C

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$tgfb_c_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$tgfb_c_stat),
  data = tcga_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$tgfb_c_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$tgfb_c_stat),
  data = metabric_gsva,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC w/ 5-year threshold"
)

Survfit Basal

# Thresholds
tcga_basal_filter <- tcga_gsva$subtype_selected == "BRCA.Basal"
tcga_gsva_basal <- tcga_gsva[, tcga_basal_filter]
metabric_basal_filter <- metabric_gsva$Pam50_SUBTYPE == "Basal"
metabric_gsva_basal <- metabric_gsva[, metabric_basal_filter]

tcga_threshold_ir_c <- median(exprs(tcga_gsva_basal["ir_c_up", ]) - exprs(tcga_gsva_basal["ir_c_down", ]))
tcga_threshold_is_c <- median(exprs(tcga_gsva_basal["is_c_up", ]) - exprs(tcga_gsva_basal["is_c_down", ]))
tcga_threshold_ir_is <- median(exprs(tcga_gsva_basal["ir_is_up", ]) - exprs(tcga_gsva_basal["ir_is_down", ]))
tcga_threshold_tgfb_c <- median(exprs(tcga_gsva_basal["tgfb_c_up", ]) - exprs(tcga_gsva_basal["tgfb_c_down", ]))
metabric_threshold_ir_c <- median(exprs(metabric_gsva_basal["ir_c_up", ]) - exprs(metabric_gsva_basal["ir_c_down", ]))
metabric_threshold_is_c <- median(exprs(metabric_gsva_basal["is_c_up", ]) - exprs(metabric_gsva_basal["is_c_down", ]))
metabric_threshold_ir_is <- median(exprs(metabric_gsva_basal["ir_is_up", ]) - exprs(metabric_gsva_basal["ir_is_down", ]))
metabric_threshold_tgfb_c <- median(exprs(metabric_gsva_basal["tgfb_c_up", ]) - exprs(metabric_gsva_basal["tgfb_c_down", ]))

tcga_threshold_ir_c_up <- median(exprs(tcga_gsva_basal["ir_c_up", ]))
tcga_threshold_is_c_up <- median(exprs(tcga_gsva_basal["is_c_up", ]))
tcga_threshold_ir_is_up <- median(exprs(tcga_gsva_basal["ir_is_up", ]))
tcga_threshold_tgfb_c_up <- median(exprs(tcga_gsva_basal["tgfb_c_up", ]))
metabric_threshold_ir_c_up <- median(exprs(metabric_gsva_basal["ir_c_up", ]))
metabric_threshold_is_c_up <- median(exprs(metabric_gsva_basal["is_c_up", ]))
metabric_threshold_ir_is_up <- median(exprs(metabric_gsva_basal["ir_is_up", ]))
metabric_threshold_tgfb_c_up <- median(exprs(metabric_gsva_basal["tgfb_c_up", ]))

Adding GSVA data

tcga_gsva_basal$ir_c <- t(exprs(tcga_gsva_basal["ir_c_up", ]) - exprs(tcga_gsva_basal["ir_c_down", ]))
tcga_gsva_basal$is_c <- t(exprs(tcga_gsva_basal["is_c_up", ]) - exprs(tcga_gsva_basal["is_c_down", ]))
tcga_gsva_basal$ir_is <- t(exprs(tcga_gsva_basal["ir_is_up", ]) - exprs(tcga_gsva_basal["ir_is_down", ]))
tcga_gsva_basal$tgfb_c <- t(exprs(tcga_gsva_basal["tgfb_c_up", ]) - exprs(tcga_gsva_basal["tgfb_c_down", ]))
metabric_gsva_basal$ir_c <- t(exprs(metabric_gsva_basal["ir_c_up", ]) - exprs(metabric_gsva_basal["ir_c_down", ]))
metabric_gsva_basal$is_c <- t(exprs(metabric_gsva_basal["is_c_up", ]) - exprs(metabric_gsva_basal["is_c_down", ]))
metabric_gsva_basal$ir_is <- t(exprs(metabric_gsva_basal["ir_is_up", ]) - exprs(metabric_gsva_basal["ir_is_down", ]))
metabric_gsva_basal$tgfb_c <- t(exprs(metabric_gsva_basal["tgfb_c_up", ]) - exprs(metabric_gsva_basal["tgfb_c_down", ]))

tcga_gsva_basal$ir_c_up <- t(exprs(tcga_gsva_basal["ir_c_up", ]))
tcga_gsva_basal$is_c_up <- t(exprs(tcga_gsva_basal["is_c_up", ]))
tcga_gsva_basal$ir_is_up <- t(exprs(tcga_gsva_basal["ir_is_up", ]))
tcga_gsva_basal$tgfb_c_up <- t(exprs(tcga_gsva_basal["tgfb_c_up", ]))
metabric_gsva_basal$ir_c_up <- t(exprs(metabric_gsva_basal["ir_c_up", ]))
metabric_gsva_basal$is_c_up <- t(exprs(metabric_gsva_basal["is_c_up", ]))
metabric_gsva_basal$ir_is_up <- t(exprs(metabric_gsva_basal["ir_is_up", ]))
metabric_gsva_basal$tgfb_c_up <- t(exprs(metabric_gsva_basal["tgfb_c_up", ]))

tcga_gsva_basal$ir_c_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$ir_c <= tcga_threshold_ir_c, "low", "high"))
tcga_gsva_basal$is_c_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$is_c <= tcga_threshold_is_c, "low", "high"))
tcga_gsva_basal$ir_is_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$ir_is <= tcga_threshold_ir_is, "low", "high"))
tcga_gsva_basal$tgfb_c_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$tgfb_c <= tcga_threshold_tgfb_c, "low", "high"))
tcga_gsva_basal$ir_c_up_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$ir_c_up <= tcga_threshold_ir_c_up, "low", "high"))
tcga_gsva_basal$is_c_up_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$is_c_up <= tcga_threshold_is_c_up, "low", "high"))
tcga_gsva_basal$ir_is_up_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$ir_is_up <= tcga_threshold_ir_is_up, "low", "high"))
tcga_gsva_basal$tgfb_c_up_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$tgfb_c_up <= tcga_threshold_tgfb_c_up, "low", "high"))

metabric_gsva_basal$ir_c_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$ir_c <= metabric_threshold_ir_c, "low", "high"))
metabric_gsva_basal$is_c_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$is_c <= metabric_threshold_is_c, "low", "high"))
metabric_gsva_basal$ir_is_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$ir_is <= metabric_threshold_ir_is, "low", "high"))
metabric_gsva_basal$tgfb_c_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$tgfb_c <= metabric_threshold_tgfb_c, "low", "high"))
metabric_gsva_basal$ir_c_up_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$ir_c_up <= metabric_threshold_ir_c_up, "low", "high"))
metabric_gsva_basal$is_c_up_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$is_c_up <= metabric_threshold_is_c_up, "low", "high"))
metabric_gsva_basal$ir_is_up_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$ir_is_up <= metabric_threshold_ir_is_up, "low", "high"))
metabric_gsva_basal$tgfb_c_up_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$tgfb_c_up <= metabric_threshold_tgfb_c_up, "low", "high"))

IR_C_UP

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$ir_c_up_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$ir_c_up_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$ir_c_up_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$ir_c_up_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric w/ 5-year threshold"
)

IR_C

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$ir_c_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$ir_c_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$ir_c_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$ir_c_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric w/ 5-year threshold"
)

IS_C_UP

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$is_c_up_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$is_c_up_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$is_c_up_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$is_c_up_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric w/ 5-year threshold"
)

IS_C

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$is_c_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$is_c_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$is_c_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$is_c_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric w/ 5-year threshold"
)

IR_IS_UP

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$ir_is_up_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$ir_is_up_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$ir_is_up_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$ir_is_up_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric w/ 5-year threshold"
)

IR_IS

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$ir_is_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$ir_is_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$ir_is_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$ir_is_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "Metabric w/ 5-year threshold"
)

TGFB_C_UP

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$tgfb_c_up_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$tgfb_c_up_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$tgfb_c_up_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$tgfb_c_up_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC w/ 5-year threshold"
)

TGFB_C

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$tgfb_c_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$tgfb_c_stat),
  data = tcga_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "TCGA w/ 5-year threshold"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$tgfb_c_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC"
)

ggsurvplot(
  fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$tgfb_c_stat),
  data = metabric_gsva_basal,
  xlab = "Days",
  ylab = "Overall survival probability",
  conf.int = TRUE,
  pval = TRUE,
  title = "METABRIC w/ 5-year threshold"
)

Cox Proportional Hazard

TCGA

pData(tcga_gsva)$subtype_selected <- relevel(factor(pData(tcga_gsva)$subtype_selected), "BRCA.Normal")
tcga.cox1 <- coxph(Surv(as.numeric(time), vital_status_1) ~ subtype_selected, data = pData(tcga_gsva))
tcga.cox2 <- coxph(Surv(as.numeric(time), vital_status_1) ~ ir_is + subtype_selected, data = pData(tcga_gsva))
tcga.cox3 <- coxph(Surv(as.numeric(time), vital_status_1) ~ ir_is + subtype_selected + age_at_index, data = pData(tcga_gsva))
anova(tcga.cox1, tcga.cox2, tcga.cox3)
summary(tcga.cox3)
## Call:
## coxph(formula = Surv(as.numeric(time), vital_status_1) ~ ir_is + 
##     subtype_selected + age_at_index, data = pData(tcga_gsva))
## 
##   n= 1208, number of events= 199 
## 
##                                coef exp(coef) se(coef)      z Pr(>|z|)    
## ir_is                      -0.16597   0.84707  0.20608 -0.805   0.4206    
## subtype_selectedBRCA.Basal -0.57840   0.56080  0.25338 -2.283   0.0224 *  
## subtype_selectedBRCA.Her2   0.03178   1.03229  0.30946  0.103   0.9182    
## subtype_selectedBRCA.LumA  -0.89462   0.40876  0.20735 -4.315 1.60e-05 ***
## subtype_selectedBRCA.LumB  -0.38217   0.68238  0.28427 -1.344   0.1788    
## age_at_index                0.03465   1.03525  0.00563  6.154 7.54e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                            exp(coef) exp(-coef) lower .95 upper .95
## ir_is                         0.8471     1.1805    0.5656    1.2686
## subtype_selectedBRCA.Basal    0.5608     1.7832    0.3413    0.9215
## subtype_selectedBRCA.Her2     1.0323     0.9687    0.5628    1.8933
## subtype_selectedBRCA.LumA     0.4088     2.4464    0.2723    0.6137
## subtype_selectedBRCA.LumB     0.6824     1.4655    0.3909    1.1912
## age_at_index                  1.0353     0.9659    1.0239    1.0467
## 
## Concordance= 0.696  (se = 0.022 )
## Likelihood ratio test= 67.52  on 6 df,   p=1e-12
## Wald test            = 70.84  on 6 df,   p=3e-13
## Score (logrank) test = 72.85  on 6 df,   p=1e-13

Metabric

pData(metabric_gsva)$Pam50_SUBTYPE <- relevel(factor(pData(metabric_gsva)$Pam50_SUBTYPE), "Normal")
metabric.cox1 <- coxph(Surv(as.numeric(time), vital_status_1) ~ Pam50_SUBTYPE, data = pData(metabric_gsva))
metabric.cox2 <- coxph(Surv(as.numeric(time), vital_status_1) ~ ir_is + Pam50_SUBTYPE, data = pData(metabric_gsva))
metabric.cox3 <- coxph(Surv(as.numeric(time), vital_status_1) ~ ir_is + Pam50_SUBTYPE + AGE_AT_DIAGNOSIS, data = pData(metabric_gsva))
anova(metabric.cox1, metabric.cox2, metabric.cox3)
summary(metabric.cox3)
## Call:
## coxph(formula = Surv(as.numeric(time), vital_status_1) ~ ir_is + 
##     Pam50_SUBTYPE + AGE_AT_DIAGNOSIS, data = pData(metabric_gsva))
## 
##   n= 1980, number of events= 1143 
## 
##                         coef exp(coef)  se(coef)      z Pr(>|z|)    
## ir_is               0.056306  1.057922  0.087440  0.644   0.5196    
## Pam50_SUBTYPEBasal  0.211357  1.235353  0.128448  1.645   0.0999 .  
## Pam50_SUBTYPEHer2   0.301280  1.351588  0.131973  2.283   0.0224 *  
## Pam50_SUBTYPELumA  -0.222111  0.800826  0.117074 -1.897   0.0578 .  
## Pam50_SUBTYPELumB   0.088125  1.092124  0.127894  0.689   0.4908    
## Pam50_SUBTYPENC     0.114633  1.121462  0.460122  0.249   0.8033    
## AGE_AT_DIAGNOSIS    0.036491  1.037165  0.002729 13.371   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                    exp(coef) exp(-coef) lower .95 upper .95
## ir_is                 1.0579     0.9452    0.8913     1.256
## Pam50_SUBTYPEBasal    1.2354     0.8095    0.9604     1.589
## Pam50_SUBTYPEHer2     1.3516     0.7399    1.0435     1.751
## Pam50_SUBTYPELumA     0.8008     1.2487    0.6366     1.007
## Pam50_SUBTYPELumB     1.0921     0.9156    0.8500     1.403
## Pam50_SUBTYPENC       1.1215     0.8917    0.4551     2.763
## AGE_AT_DIAGNOSIS      1.0372     0.9642    1.0316     1.043
## 
## Concordance= 0.611  (se = 0.009 )
## Likelihood ratio test= 233.5  on 7 df,   p=<2e-16
## Wald test            = 223.3  on 7 df,   p=<2e-16
## Score (logrank) test = 228.2  on 7 df,   p=<2e-16